Introduction

This script is used to analyze siRNA screen validation data obtained with High-Content Image analysis. In particular, the siRNA library used is the Dharmacon generated by Andria Schibler and Pedja Jevtic, based on the primary siRNA screen results using the Epigenetics and Custom Nuclear Envelope libraries from ThermoFisher.

The siRNA library was originally received dried in 96 well plates at 0.25 nM. It was resuspended and mixed in 50 ul nuclease-free water to obtain a final concentration of 5 uM, frozen overnight and then thawed to increase oligo siRNA solubility.45 ul were aspirated from the original source plate and dispensed into separate wells to generate a 384 well master Plate. These siRNAs occupied columns 1 to 22 (Partial). All these liquid handling operations were performed using a PerkinElmer Janus instrument, which output all the liquid handling operations logs as text files.384 well imaging ready plates containing spotted siRNA (300 nl) were generated using an Echo525. 300 of ul control siRNA (5uM) were then added to their respective wells in columns 22, 23 and 24. The imaging assay ready plates were sealed with aluminum sealing foil and stored at -20oC until use.

Imaging Assay plates were dried, frozen and then used in reverse transfection experiments. For reverese transfection, plates were thawed, spinned and 20 ul of Optimem + 0.05 ul/well of RNAiMax were added to the plates and incubated for 30’. Then 20 ul of cells were added on top of the siRNA/RNAiMax mix and incubated for 72 hrs.

Fixed and stained plates were imaged on an Opera QEHS microscope using a 40X water immersion objectives. Images were analyzed in Columbus 2.8.1, and image analysis results were exported as tab delimited .txt files.

The script reads library reformatting files used on the Echo (Containing the siRNA layout), the Columbus image analysis results and generates statistical analysis.

Ab markers:

Load packages.

library(plyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()   masks plyr::arrange()
## ✖ purrr::compact()   masks plyr::compact()
## ✖ dplyr::count()     masks plyr::count()
## ✖ dplyr::desc()      masks plyr::desc()
## ✖ dplyr::failwith()  masks plyr::failwith()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::id()        masks plyr::id()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::mutate()    masks plyr::mutate()
## ✖ dplyr::rename()    masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(ggthemes)
library(viridis)
## Loading required package: viridisLite

Read the siRNA layouts and generate a control layout

Set channel variable names.

green_name = "LMNB"
red_name = "LMNA"

Read the siRNA layout information provided by Dharmacon and select only relevant columns. The custom cherry pick work list file that was generated by the Janus to reformat the siRNA oligos from 96 well to 384.

dt_siRNA <- fread(input = "Cherry_Pick_Worklist.csv")

setnames(dt_siRNA, c("dest_col", 
                  "dest_row",
                  "dest_well"), 
                c("Column",
                  "Row",
                  "WellName"))

glimpse(dt_siRNA)
## Rows: 340
## Columns: 17
## $ .id            <chr> "LP_48199 G-CUSTOM-366598.csv", "LP_48199 G-CUSTOM-3665…
## $ source_rack    <chr> "Plate 1", "Plate 1", "Plate 1", "Plate 1", "Plate 1", …
## $ source_well    <chr> "A02", "B02", "C02", "D02", "E02", "F02", "G02", "H02",…
## $ oligo_id       <chr> "D-009329-01", "D-003477-18", "D-011653-01", "D-007452-…
## $ gene_symbol    <chr> "ACAA1", "CREBBP", "EYA3", "SLC22A18", "RPA3", "TP53BP1…
## $ gene_id        <int> 30, 1387, 2140, 5002, 6119, 7158, 8520, 9631, 30, 1387,…
## $ gene_accession <chr> "NM_001607", "NM_001079846", "NM_001990", "NM_002555", …
## $ gi_number      <int> 6598316, 119943101, 26667242, 34734074, 52851430, 50321…
## $ sequence       <chr> "GAGAUUGCCUGAUUCCUAU", "UCACAGAGAUCCAGGGCGA", "GAUUAUAC…
## $ source_pos     <int> 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, …
## $ source_col     <int> 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4…
## $ source_row     <int> 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3…
## $ source_row2    <chr> "A", "B", "C", "D", "E", "F", "G", "H", "A", "B", "C", …
## $ dest_pos       <int> 84, 72, 340, 225, 89, 66, 87, 221, 254, 127, 30, 44, 16…
## $ Column         <int> 6, 5, 22, 15, 6, 5, 6, 14, 16, 8, 2, 3, 11, 18, 1, 5, 9…
## $ Row            <int> 4, 8, 4, 1, 9, 2, 7, 13, 14, 15, 14, 12, 8, 14, 5, 5, 4…
## $ WellName       <chr> "D6", "H5", "D22", "A15", "I6", "B5", "G6", "M14", "N16…

Create a control layout data.table that contains the positions of the library (sample), empty wells (empty), and controls (negative, positive1 or LMNB1, positive2 or SYNE2, positive3 or LMNA, and killer).

dt_control <- data.table(Column = rep(1:24, each = 16), Row = rep(1:16, 24))
dt_control[, WellName := paste0(LETTERS[Row], Column)]
dt_control[Column %in% 1:21, treatment := "sample"]
dt_control[Column == 22 & Row %in% 1:4, treatment := "sample"]
dt_control[Column == 22 & Row %in% 5:12, treatment := "LMNA"]
dt_control[Column == 22 & Row %in% 13:16, treatment := "empty"]
dt_control[Column %in% 23 & Row %in% seq(1, 16, 2), treatment := "negative"]
dt_control[Column %in% 23 & Row %in% seq(2, 16, 2), treatment := "killer"]
dt_control[Column %in% 24 & Row %in% seq(1, 16, 2), treatment := "LMNB1"]
dt_control[Column %in% 24 & Row %in% seq(2, 16, 2), treatment := "SYNE2"]

glimpse(dt_control)
## Rows: 384
## Columns: 4
## $ Column    <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, …
## $ Row       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2,…
## $ WellName  <chr> "A1", "B1", "C1", "D1", "E1", "F1", "G1", "H1", "I1", "J1", …
## $ treatment <chr> "sample", "sample", "sample", "sample", "sample", "sample", …

Merge the control layout and the siRNA layout data.tables.

dt_layout <- dt_siRNA[dt_control, on = c("Column", "Row")]

dt_layout
##                               .id source_rack source_well    oligo_id
##   1: LP_48199 G-CUSTOM-366598.csv     Plate 3         A08 D-005060-01
##   2: LP_48199 G-CUSTOM-366598.csv     Plate 2         D09 D-006963-02
##   3: LP_48199 G-CUSTOM-366598.csv     Plate 3         F08 D-007094-01
##   4: LP_48199 G-CUSTOM-366598.csv     Plate 2         C04 D-013383-01
##   5: LP_48199 G-CUSTOM-366598.csv     Plate 1         G03 D-011490-02
##  ---                                                                 
## 380:                         <NA>        <NA>        <NA>        <NA>
## 381:                         <NA>        <NA>        <NA>        <NA>
## 382:                         <NA>        <NA>        <NA>        <NA>
## 383:                         <NA>        <NA>        <NA>        <NA>
## 384:                         <NA>        <NA>        <NA>        <NA>
##      gene_symbol gene_id gene_accession gi_number            sequence
##   1:       CHMP6   79643      NM_024591  52851447 UCACCCAGAUCGAAAUGAA
##   2:        BRD1   23774      NM_014577  11321641 GUUAGAAGCUCAAGGGUAU
##   3:       PCGF1   84759      NM_032673 109240537 GAACAAGUAUGUCCGAUGU
##   4:        WDR5   11091      NM_052821  61744460 GAGAGUGGCUGGCAAGUUC
##   5:        HAT1    8520   NM_001033085  74319830 GAAGAUUACCGGCGUGUUA
##  ---                                                                 
## 380:        <NA>      NA           <NA>        NA                <NA>
## 381:        <NA>      NA           <NA>        NA                <NA>
## 382:        <NA>      NA           <NA>        NA                <NA>
## 383:        <NA>      NA           <NA>        NA                <NA>
## 384:        <NA>      NA           <NA>        NA                <NA>
##      source_pos source_col source_row source_row2 dest_pos Column Row WellName
##   1:         57          8          1           A        1      1   1       A1
##   2:         68          9          4           D        2      1   2       B1
##   3:         62          8          6           F        3      1   3       C1
##   4:         27          4          3           C        4      1   4       D1
##   5:         23          3          7           G        5      1   5       E1
##  ---                                                                          
## 380:         NA         NA         NA        <NA>       NA     24  12     <NA>
## 381:         NA         NA         NA        <NA>       NA     24  13     <NA>
## 382:         NA         NA         NA        <NA>       NA     24  14     <NA>
## 383:         NA         NA         NA        <NA>       NA     24  15     <NA>
## 384:         NA         NA         NA        <NA>       NA     24  16     <NA>
##      i.WellName treatment
##   1:         A1    sample
##   2:         B1    sample
##   3:         C1    sample
##   4:         D1    sample
##   5:         E1    sample
##  ---                     
## 380:        L24     SYNE2
## 381:        M24     LMNB1
## 382:        N24     SYNE2
## 383:        O24     LMNB1
## 384:        P24     SYNE2

Read and process the Columbus data

Set RegEx patterns for directory searches for logs file data and spot data on a per protocol step basis.

pat_col <- "*.result\\.1\\.txt"# Pattern for Columbus results files

Create a list of the RegEx patterns set in the previous chunk. Important: the list names will be carried over all the next steps!!!

pat_list <- list(col = pat_col)

pat_list

Recursively search the working directory and its subdirectories for files whose name includes the RegEx patterns defined two chunks above. The path_list functon outputs absolute file names. path_list is a list containing all the filenames on a per Janus step basis.

list_files <- function(x){
  dir(path = "input", pattern = x, full.names = TRUE, recursive = TRUE, include.dirs = TRUE)
}

path_list <- llply(pat_list, list_files) 

path_list

Extract file names from absolute path and set them as list element names.

trim_names <- function(x){
  names(x) <- basename(x) # This assigns the filename to the file that it is read
  y <- x ## This is necessary because of scoping issues
}

path_list <- llply(path_list, trim_names) 

Recursively read and merge object level data files as data.frames. Rows are labeled with relative filenames (The .id variable). This and the previous chunks are slightly modified tricks adopted from H. Wickam “Tidy Data” paper.

read_merge <- function(x){
  dt <-as.data.table(ldply(x, fread, integer64 = "character")) 
}

dt_list <- llply(path_list, read_merge)

Separate Columbus data from the other classes of data.

dt_col <- dt_list$col

rm(dt_list)

glimpse(dt_col)
## Rows: 768
## Columns: 22
## $ .id                                                                        <chr> …
## $ ScreenName                                                                 <chr> …
## $ ScreenID                                                                   <int> …
## $ PlateName                                                                  <chr> …
## $ PlateID                                                                    <int> …
## $ MeasurementDate                                                            <dttm> …
## $ MeasurementID                                                              <int> …
## $ WellName                                                                   <chr> …
## $ Row                                                                        <int> …
## $ Column                                                                     <int> …
## $ Timepoint                                                                  <int> …
## $ Plane                                                                      <int> …
## $ `Nuclei Selected - Number of Objects`                                      <int> …
## $ `Nuclei Selected - Nucleus Area [µm²] - Mean per Well`                     <dbl> …
## $ `Nuclei Selected - Nucleus Roundness - Mean per Well`                      <dbl> …
## $ `Nuclei Selected - Nucleus Width [µm] - Mean per Well`                     <dbl> …
## $ `Nuclei Selected - Nucleus Length [µm] - Mean per Well`                    <dbl> …
## $ `Nuclei Selected - Nucleus Ratio Width to Length - Mean per Well`          <dbl> …
## $ `Nuclei Selected - Intensity Nucleus Region Exp2Cam1 Mean - Mean per Well` <dbl> …
## $ `Nuclei Selected - Intensity Nucleus Region Exp3Cam2 Mean - Mean per Well` <dbl> …
## $ `Number of Analyzed Fields`                                                <int> …
## $ Link                                                                       <chr> …

Rename variables

setnames(dt_col, c("Nuclei Selected - Number of Objects",
                   "Nuclei Selected - Nucleus Area [µm²] - Mean per Well",
                   "Nuclei Selected - Nucleus Roundness - Mean per Well",
                   "Nuclei Selected - Intensity Nucleus Region Exp2Cam1 Mean - Mean per Well",
                   "Nuclei Selected - Intensity Nucleus Region Exp3Cam2 Mean - Mean per Well"),
                 c("cell_number",
                   "nuc_area",
                   "nuc_roundness",
                   "nuc_green_int",
                   "nuc_red_int"))

glimpse(dt_col)
## Rows: 768
## Columns: 22
## $ .id                                                               <chr> "Sha…
## $ ScreenName                                                        <chr> "180…
## $ ScreenID                                                          <int> 3273…
## $ PlateName                                                         <chr> "HTI…
## $ PlateID                                                           <int> 3822…
## $ MeasurementDate                                                   <dttm> 201…
## $ MeasurementID                                                     <int> 3747…
## $ WellName                                                          <chr> "A1"…
## $ Row                                                               <int> 1, 1…
## $ Column                                                            <int> 1, 2…
## $ Timepoint                                                         <int> 1, 1…
## $ Plane                                                             <int> 1, 1…
## $ cell_number                                                       <int> 420,…
## $ nuc_area                                                          <dbl> 177.…
## $ nuc_roundness                                                     <dbl> 0.92…
## $ `Nuclei Selected - Nucleus Width [µm] - Mean per Well`            <dbl> 11.1…
## $ `Nuclei Selected - Nucleus Length [µm] - Mean per Well`           <dbl> 18.5…
## $ `Nuclei Selected - Nucleus Ratio Width to Length - Mean per Well` <dbl> 0.60…
## $ nuc_green_int                                                     <dbl> 2771…
## $ nuc_red_int                                                       <dbl> 278.…
## $ `Number of Analyzed Fields`                                       <int> 30, …
## $ Link                                                              <chr> "htt…

Merge the Columbus measurements data with the layout data.

dt_data <- dt_col[dt_layout, on = c("Column", "Row")]

glimpse(dt_data)
## Rows: 768
## Columns: 39
## $ .id                                                               <chr> "Sha…
## $ ScreenName                                                        <chr> "180…
## $ ScreenID                                                          <int> 3273…
## $ PlateName                                                         <chr> "HTI…
## $ PlateID                                                           <int> 3822…
## $ MeasurementDate                                                   <dttm> 201…
## $ MeasurementID                                                     <int> 3747…
## $ WellName                                                          <chr> "A1"…
## $ Row                                                               <int> 1, 1…
## $ Column                                                            <int> 1, 1…
## $ Timepoint                                                         <int> 1, 1…
## $ Plane                                                             <int> 1, 1…
## $ cell_number                                                       <int> 420,…
## $ nuc_area                                                          <dbl> 177.…
## $ nuc_roundness                                                     <dbl> 0.92…
## $ `Nuclei Selected - Nucleus Width [µm] - Mean per Well`            <dbl> 11.1…
## $ `Nuclei Selected - Nucleus Length [µm] - Mean per Well`           <dbl> 18.5…
## $ `Nuclei Selected - Nucleus Ratio Width to Length - Mean per Well` <dbl> 0.60…
## $ nuc_green_int                                                     <dbl> 2771…
## $ nuc_red_int                                                       <dbl> 278.…
## $ `Number of Analyzed Fields`                                       <int> 30, …
## $ Link                                                              <chr> "htt…
## $ i..id                                                             <chr> "LP_…
## $ source_rack                                                       <chr> "Pla…
## $ source_well                                                       <chr> "A08…
## $ oligo_id                                                          <chr> "D-0…
## $ gene_symbol                                                       <chr> "CHM…
## $ gene_id                                                           <int> 7964…
## $ gene_accession                                                    <chr> "NM_…
## $ gi_number                                                         <int> 5285…
## $ sequence                                                          <chr> "UCA…
## $ source_pos                                                        <int> 57, …
## $ source_col                                                        <int> 8, 8…
## $ source_row                                                        <int> 1, 1…
## $ source_row2                                                       <chr> "A",…
## $ dest_pos                                                          <int> 1, 1…
## $ i.WellName                                                        <chr> "A1"…
## $ i.WellName.1                                                      <chr> "A1"…
## $ treatment                                                         <chr> "sam…

Plot the data

Quite a few wells in HTIF00183 clearly had an issue, either due to the sample itself, or to the autofocus. For this reason, these wells will be eliminated from the analysis.

excluded <- c("M7", "N7", "O7", "P7", "O8", "P8")

dt_data <- dt_data[!(PlateName == "HTIF00183" & WellName %in% excluded),]

Calculate the mean and standard devitation (SD) for the negative control siRNA on a per plate basis.

dt_norm <-
    dt_data[treatment == "negative", .(
    neg_n_cells_mean = mean(cell_number, na.rm = TRUE),
    neg_n_cells_sd = sd(cell_number, na.rm = TRUE),
    neg_area_mean = mean(nuc_area, na.rm = TRUE),
    neg_area_sd = sd(nuc_area, na.rm = TRUE),
    neg_round_mean = mean(nuc_roundness, na.rm = TRUE),
    neg_round_sd = sd(nuc_roundness, na.rm = TRUE),
    neg_nuc_red_mean = mean(nuc_red_int, na.rm = TRUE),
    neg_nuc_red_sd = sd(nuc_red_int, na.rm = TRUE),
    neg_nuc_green_mean = mean(nuc_green_int, na.rm = TRUE),
    neg_nuc_green_sd = sd(nuc_green_int, na.rm = TRUE)
    ), by = .id]

glimpse(dt_norm)
## Rows: 2
## Columns: 11
## $ .id                <chr> "Shape_Size_Intensity_hEpigenetics_Val[159522].resu…
## $ neg_n_cells_mean   <dbl> 403.125, 308.375
## $ neg_n_cells_sd     <dbl> 48.40141, 29.04645
## $ neg_area_mean      <dbl> 192.2003, 192.9557
## $ neg_area_sd        <dbl> 6.202427, 4.306389
## $ neg_round_mean     <dbl> 0.9634322, 0.9740030
## $ neg_round_sd       <dbl> 0.004896193, 0.002212123
## $ neg_nuc_red_mean   <dbl> 251.4881, 309.4958
## $ neg_nuc_red_sd     <dbl> 37.80239, 15.59650
## $ neg_nuc_green_mean <dbl> 1189.074, 1411.724
## $ neg_nuc_green_sd   <dbl> 257.6809, 142.6345

Calculate Z-scores based on the mean and SD of the negative controls. Also calculate the negative control normalized values (On a per plate basis).

dt_data <- dt_data[dt_norm, on = ".id"]

z_score <- function(measurement, average, s_dev){
           return((measurement - average)/s_dev)
}

norm_change <-function(measurement, average){
            return(100*(measurement/average))
}

dt_data[, `:=`(n_cells_z_score = z_score(cell_number, neg_n_cells_mean, neg_n_cells_sd),
               n_cells_norm_change = norm_change(cell_number, neg_n_cells_mean),
               area_z_score = z_score(nuc_area, neg_area_mean, neg_area_sd),
               area_norm_change = norm_change(nuc_area, neg_area_mean),
               round_z_score = z_score(nuc_roundness, neg_round_mean, neg_round_sd),
               round_norm_change = norm_change(nuc_roundness, neg_round_mean),
               nuc_red_z_score = z_score(nuc_red_int, neg_nuc_red_mean, neg_nuc_red_sd),
               nuc_red_norm_change = norm_change(nuc_red_int, neg_nuc_red_mean),
               nuc_green_z_score = z_score(nuc_green_int, neg_nuc_green_mean, neg_nuc_green_sd),
               nuc_green_norm_change = norm_change(nuc_green_int, neg_nuc_green_mean))]

glimpse(dt_data)
## Rows: 762
## Columns: 59
## $ .id                                                               <chr> "Sha…
## $ ScreenName                                                        <chr> "180…
## $ ScreenID                                                          <int> 3273…
## $ PlateName                                                         <chr> "HTI…
## $ PlateID                                                           <int> 3822…
## $ MeasurementDate                                                   <dttm> 201…
## $ MeasurementID                                                     <int> 3747…
## $ WellName                                                          <chr> "A1"…
## $ Row                                                               <int> 1, 2…
## $ Column                                                            <int> 1, 1…
## $ Timepoint                                                         <int> 1, 1…
## $ Plane                                                             <int> 1, 1…
## $ cell_number                                                       <int> 420,…
## $ nuc_area                                                          <dbl> 177.…
## $ nuc_roundness                                                     <dbl> 0.92…
## $ `Nuclei Selected - Nucleus Width [µm] - Mean per Well`            <dbl> 11.1…
## $ `Nuclei Selected - Nucleus Length [µm] - Mean per Well`           <dbl> 18.5…
## $ `Nuclei Selected - Nucleus Ratio Width to Length - Mean per Well` <dbl> 0.60…
## $ nuc_green_int                                                     <dbl> 2771…
## $ nuc_red_int                                                       <dbl> 278.…
## $ `Number of Analyzed Fields`                                       <int> 30, …
## $ Link                                                              <chr> "htt…
## $ i..id                                                             <chr> "LP_…
## $ source_rack                                                       <chr> "Pla…
## $ source_well                                                       <chr> "A08…
## $ oligo_id                                                          <chr> "D-0…
## $ gene_symbol                                                       <chr> "CHM…
## $ gene_id                                                           <int> 7964…
## $ gene_accession                                                    <chr> "NM_…
## $ gi_number                                                         <int> 5285…
## $ sequence                                                          <chr> "UCA…
## $ source_pos                                                        <int> 57, …
## $ source_col                                                        <int> 8, 9…
## $ source_row                                                        <int> 1, 4…
## $ source_row2                                                       <chr> "A",…
## $ dest_pos                                                          <int> 1, 2…
## $ i.WellName                                                        <chr> "A1"…
## $ i.WellName.1                                                      <chr> "A1"…
## $ treatment                                                         <chr> "sam…
## $ neg_n_cells_mean                                                  <dbl> 403.…
## $ neg_n_cells_sd                                                    <dbl> 48.4…
## $ neg_area_mean                                                     <dbl> 192.…
## $ neg_area_sd                                                       <dbl> 6.20…
## $ neg_round_mean                                                    <dbl> 0.96…
## $ neg_round_sd                                                      <dbl> 0.00…
## $ neg_nuc_red_mean                                                  <dbl> 251.…
## $ neg_nuc_red_sd                                                    <dbl> 37.8…
## $ neg_nuc_green_mean                                                <dbl> 1189…
## $ neg_nuc_green_sd                                                  <dbl> 257.…
## $ n_cells_z_score                                                   <dbl> 0.34…
## $ n_cells_norm_change                                               <dbl> 104.…
## $ area_z_score                                                      <dbl> -2.3…
## $ area_norm_change                                                  <dbl> 92.3…
## $ round_z_score                                                     <dbl> -8.1…
## $ round_norm_change                                                 <dbl> 95.8…
## $ nuc_red_z_score                                                   <dbl> 0.70…
## $ nuc_red_norm_change                                               <dbl> 110.…
## $ nuc_green_z_score                                                 <dbl> 6.13…
## $ nuc_green_norm_change                                             <dbl> 233.…

Write the data to a .csv file for further analysis

write.table(dt_data, 
            paste("output/validation", red_name, green_name, "normalized_results.txt", sep = "_"),
            quote = FALSE,
            sep = "\t",
            row.names = FALSE, 
            col.names = TRUE)

Create a novel unique identifier for each oligo that contains also the gene symbol.

dt_data[,oligo_name := paste(gene_symbol, str_extract(oligo_id, "[0-9]{2}$"), sep = "-")]

Aggregate the biological repeats (The two different plates) by calculating the mean and SD for all the variables (Z-scores and fold changes). n = 2.

dt_aggregated <- dt_data %>% 
        group_by(gene_symbol,
                 gene_id,
                 oligo_id,
                 oligo_name,
                 sequence) %>%
        summarise(across(n_cells_z_score:nuc_green_norm_change, list(mean, sd))) %>%
        arrange(oligo_name)

write.table(dt_aggregated, 
            paste("output/validation", red_name, green_name, "aggregated_results.txt", sep = "_"),
            quote = FALSE,
            sep = "\t",
            row.names = FALSE, 
            col.names = TRUE)

Document the information about the analysis session

sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.2     viridisLite_0.4.1 ggthemes_4.2.4    data.table_1.14.8
##  [5] knitr_1.42        lubridate_1.9.2   forcats_1.0.0     stringr_1.5.0    
##  [9] dplyr_1.1.1       purrr_1.0.1       readr_2.1.4       tidyr_1.3.0      
## [13] tibble_3.2.1      ggplot2_3.4.1     tidyverse_2.0.0   plyr_1.8.8       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.10       digest_0.6.31     utf8_1.2.3        R6_2.5.1         
##  [5] backports_1.4.1   evaluate_0.20     highr_0.10        pillar_1.9.0     
##  [9] rlang_1.1.0       rstudioapi_0.14   jquerylib_0.1.4   rpart_4.1.19     
## [13] checkmate_2.1.0   rmarkdown_2.21    labeling_0.4.2    foreign_0.8-84   
## [17] htmlwidgets_1.6.2 munsell_0.5.0     compiler_4.2.3    xfun_0.38        
## [21] pkgconfig_2.0.3   base64enc_0.1-3   htmltools_0.5.5   nnet_7.3-18      
## [25] tidyselect_1.2.0  gridExtra_2.3     htmlTable_2.4.1   Hmisc_5.0-1      
## [29] fansi_1.0.4       tzdb_0.3.0        withr_2.5.0       grid_4.2.3       
## [33] jsonlite_1.8.4    gtable_0.3.3      lifecycle_1.0.3   magrittr_2.0.3   
## [37] scales_1.2.1      cli_3.6.1         stringi_1.7.12    cachem_1.0.7     
## [41] farver_2.1.1      bslib_0.4.2       generics_0.1.3    vctrs_0.6.1      
## [45] Formula_1.2-5     tools_4.2.3       glue_1.6.2        hms_1.1.3        
## [49] fastmap_1.1.1     yaml_2.3.7        timechange_0.2.0  colorspace_2.1-0 
## [53] cluster_2.1.4     sass_0.4.5